library

library(Seurat)
library(patchwork)
library(dplyr)
library(tidyr)
library(ggplot2)
library(reshape2)
library(tidyverse)
library(ggsci)
library(monocle3)
library(data.table)
library(readxl)
library(ggplot2)
library(ggpubr)
library(pheatmap)
library(plyr)
library(SCENIC)
library(ggrepel)
library(ComplexHeatmap)
library(circlize)
library(clusterProfiler)
library(msigdbr)
library(org.Mm.eg.db)
library(infercnv)
library(rstatix)
library(ArchR)
library(RColorBrewer)
library(ggalluvial)
library(org.Hs.eg.db)
library(parallel)
library(SCopeLoomR)
library(AUCell)
library(SCENIC)
library(KernSmooth)
library(plotly)
library(BiocParallel)
library(grid)

Path

color

col_cell <- c("#D51F26","#F47D2B","#89288F")
col_sample <- c("#89C75F","#C06CAB","#F37B7D")
col_celltype_new <- c(
 "#D51F26","#89C75F","#1E78B4","#EACEE3","#A1CDE1" ,"#eebb44", #'HSC','MPP','LMPP','MLP','GMP','MEP'
 "#90D5E4","#208A42","#8A9FD1","#F47D2B","#89288F","grey")
 
col_CD34 <- c("#F37B7D","#89C75F","#1E78B4","#EACEE3","#A1CDE1" ,"#eebb44")

# col_cds <- c("#F7FCFD","#E0ECF4","#BFD3E6","#9EBCDA","#8C96C6","#8C6BB1","#88419D") 
col_cds <- c("#E0ECF4","#BFD3E6","#9EBCDA","#8C96C6","#8C6BB1","#88419D","#4D004B")

col_B=c("#208A42","#90D5E4","#8A9FD1","#F47D2B","#D51F26","#88419D")
col_B2=c( "#90D5E4","#208A42","#8A9FD1","#F47D2B","grey")

col_sample_s <- c("#E1F1D7","#C3E3AF","#A6D587","#89C75F","#FCDEDE","#F9BDBE","#F59C9D","#F37B7D")
col_type <- c("#89C75F","#F37B7D")

rdwhbu <- colorRampPalette(c("navy", "white", "brown3"))

Figure1

10X scRNA

data

MM <- readRDS("/media/liyaru/LYR/MM2023/5_Result/7_Seurat/MM_new.rds")
BC <- readRDS("/media/liyaru/LYR/MM2023/5_Result/7_Seurat/BC_new.rds")

print(MM)
## An object of class Seurat 
## 27103 features across 17465 samples within 4 assays 
## Active assay: RNA (25051 features, 0 variable features)
##  3 other assays present: prediction.score.celltype, predicted_ADT, integrated
##  3 dimensional reductions calculated: pca, umap, tsne
print(BC)
## An object of class Seurat 
## 23625 features across 7203 samples within 2 assays 
## Active assay: RNA (21625 features, 0 variable features)
##  1 other assay present: integrated
##  2 dimensional reductions calculated: pca, umap

UMAP

p1 <- DimPlot(MM, group.by =  "cell",cols = col_cell,pt.size = 0.6)
p2 <- DimPlot(MM,group.by = "celltype_new",cols = col_celltype_new,pt.size = 0.6)
p3 <- DimPlot(MM, group.by =  "sample",cols = col_sample,pt.size = 0.6)

p1 + p2 + p3

HSC UMAP zoom in

Idents(MM) <- MM$cell
CD34 <- subset(MM,idents="CD34")
DimPlot(CD34,group.by = "celltype_new",cols = col_celltype_new,pt.size = 1)+
  xlim(0,5)+
  ylim(-15,-6)
## Warning: Removed 63 rows containing missing values or values outside the scale range
## (`geom_point()`).

QC

feature count

MM$cell <- factor(MM$cell,levels = c("CD34","CD19","CD138"))

p <- VlnPlot(MM,features = c("nCount_RNA","nFeature_RNA","percent.mt"),
        group.by = "cell",split.by ="sample" ,cols = col_sample,
        pt.size = 0) + 
  theme(legend.position = "right")
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##       
## This message will be shown once per session.
p

Marker gene exp

HSPC_marker <- c("CD34","HLF","AVP","CRHBP", # HSC
                 "FLT3","DNTT", # LP
                 "MPO","ELANE", # GMP
                 "PBX1","KLF1" ) # MEP
                 
BC_marker <- c(# "FLT3",
              "CD19","CD79A",
               "IL7R","MME", # CD10,
              "CD27","MS4A1")

Plasma_marker <- c("SDC1","CD38","IGKC",
                   "TNFRSF17" # BCMA
                   )

combined_plot <- NULL

for (g in c(HSPC_marker,BC_marker,Plasma_marker)){
  if (g %in% HSPC_marker){
    cols = c(alpha("#A1CDE1",0.3),"#D51F26")
  }
  if (g %in% BC_marker){
    cols = c(alpha("#A1CDE1",0.3),"#F47D2B")
  }
  if (g %in% Plasma_marker){
    cols = c(alpha("#A1CDE1",0.3),"#89288F")
  }
  
  p <- FeaturePlot(MM,features = g,slot = "data",ncol = 4,pt.size = 0.6,order = T,
            cols = cols)
  
  if (is.null(combined_plot)){
    combined_plot <- p[[1]]
  }else{
    combined_plot <- combined_plot + p[[1]]
  }
}
print(combined_plot)

# p1 <- FeaturePlot(MM,features = HSPC_marker,slot = "data",ncol = 4,pt.size = 0.6,
#             cols = c(alpha("#A1CDE1",0.3),"#D51F26"),
#             order = T)
# 
# p2 <- FeaturePlot(MM,features = BC_marker,slot = "data",ncol = 4,pt.size = 0.6,
#             cols = c(alpha("#A1CDE1",0.3),"#F47D2B"),
#             order = T)
# 
# p3 <- FeaturePlot(MM,features = Plasma_marker,slot = "data",ncol = 4,pt.size = 0.6,
#             cols = c(alpha("#A1CDE1",0.3),"#89288F"),
#             order = T)
HSPC exp zoom in
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for x is already present.
## Adding another scale for x, which will replace the existing scale.
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

Dotplot (average exp)

DotPlot(MM,,features = c(HSPC_marker,BC_marker,Plasma_marker),
        group.by = "celltype_new",cols = c("white","#D51F26")) + RotatedAxis()

Correlation

t = AverageExpression(MM,group.by = c("sample","cell"),slot = "scale.data")
tt = t[["integrated"]]
ttt = cor(tt,method = "spearman")

col_dist = dist(ttt)
hclust1 <- hclust(col_dist)
myorder = c("HD_CD34","MM1_CD34","MM2_CD34","HD_CD19","MM1_CD19","MM2_CD19",
            "HD_CD138","MM1_CD138","MM2_CD138")

dend = reorder(as.dendrogram(hclust1),wts=order(match(myorder,rownames(ttt))),agglo.FUN = max)
col_cluster <- as.hclust(dend)

group = colnames(ttt)
names(group) = NULL
group = as.data.frame(group)
rownames(group) <- group$group
group <- separate(group,col="group",into = c("Class","Cell"))


cell_col <- col_cell
names(cell_col) <- c("CD34","CD19",'CD138')

class_col <- col_sample
names(class_col) <- c( "HD","MM1",'MM2')
ann_colors <- list(Cell=cell_col,Class=class_col)

p <- pheatmap(ttt,annotation_row = group,
              annotation_colors = ann_colors)
p

Raito

All cells

m <- MM@meta.data
m$number=1

table(m$cell,m$sample)
##        
##           HD  MM1  MM2
##   CD34   699  674  334
##   CD19  2879 3639  839
##   CD138  256 3851 4294
table(m$sample,m$celltype_new)
##      
##        HSC  MPP LMPP  MLP  GMP  MEP Prog_B 1 Prog_B 2 Naive B Memory B
##   HD   629   13    9   23   17    8     1381      432     616      367
##   MM1  232    7   33   83  185  134        1       35    1501     2075
##   MM2   78    0   22  190   18   26       70       33     367      366
##      
##       Plasmablast Other
##   HD          338     1
##   MM1        3857    21
##   MM2        4296     1
p1 <- ggplot(m,aes(sample,number,fill=celltype_new))+
  geom_bar(stat="identity",position="stack")+
  theme_classic(base_size = 16)+
  scale_fill_manual(values =  col_celltype_new)+
  RotatedAxis()

m <- ddply(m,'sample',transform,percent = 1/sum(number)*100)
p2 <- ggplot(m,aes(sample,percent,fill=celltype_new))+
  geom_bar(stat="identity",position="stack")+
  theme_classic(base_size = 16)+
  scale_fill_manual(values =  col_celltype_new)+
  RotatedAxis()

p1 + p2

CD34+ cells

m <- MM@meta.data
m <- m[m$cell =="CD34",]
m$number=1

p1 <- ggplot(m,aes(sample,number,fill=celltype_new))+
  geom_bar(stat="identity",position="stack")+
  theme_classic(base_size = 16)+
  scale_fill_manual(values = col_CD34)+
  RotatedAxis()

m <- ddply(m,'sample',transform,percent = 1/sum(number)*100)
p2 <- ggplot(m,aes(sample,percent,fill=celltype_new))+
  geom_bar(stat="identity",position="stack")+
  theme_classic(base_size = 16)+
  scale_fill_manual(values = col_CD34)+
  RotatedAxis()

p1 + p2

prop test
t = table(m$sample,m$celltype_new)
ratio = prop.table(t) %>% as.data.frame()
print(ratio)
##    Var1        Var2        Freq
## 1    HD         HSC 0.368482718
## 2   MM1         HSC 0.135910955
## 3   MM2         HSC 0.045694200
## 4    HD         MPP 0.007615700
## 5   MM1         MPP 0.004100762
## 6   MM2         MPP 0.000000000
## 7    HD        LMPP 0.005272408
## 8   MM1        LMPP 0.019332162
## 9   MM2        LMPP 0.012888108
## 10   HD         MLP 0.013473931
## 11  MM1         MLP 0.048623316
## 12  MM2         MLP 0.111306385
## 13   HD         GMP 0.009958992
## 14  MM1         GMP 0.108377270
## 15  MM2         GMP 0.010544815
## 16   HD         MEP 0.004686585
## 17  MM1         MEP 0.078500293
## 18  MM2         MEP 0.015231400
## 19   HD    Prog_B 1 0.000000000
## 20  MM1    Prog_B 1 0.000000000
## 21  MM2    Prog_B 1 0.000000000
## 22   HD    Prog_B 2 0.000000000
## 23  MM1    Prog_B 2 0.000000000
## 24  MM2    Prog_B 2 0.000000000
## 25   HD     Naive B 0.000000000
## 26  MM1     Naive B 0.000000000
## 27  MM2     Naive B 0.000000000
## 28   HD    Memory B 0.000000000
## 29  MM1    Memory B 0.000000000
## 30  MM2    Memory B 0.000000000
## 31   HD Plasmablast 0.000000000
## 32  MM1 Plasmablast 0.000000000
## 33  MM2 Plasmablast 0.000000000
## 34   HD       Other 0.000000000
## 35  MM1       Other 0.000000000
## 36  MM2       Other 0.000000000
# MM1 vs HD
tt = matrix(t[1:2,1:6],nrow = 2)
# tt = t(tt) # same result
# fisher.test(tt,simulate.p.value=TRUE)
print(chisq.test(tt))
## 
##  Pearson's Chi-squared test
## 
## data:  tt
## X-squared = 483.76, df = 5, p-value < 2.2e-16
# MM2 vs HD
tt = matrix(t[c(1,3),1:6],nrow = 2)
print(chisq.test(tt))
## Warning in chisq.test(tt): Chi-squared近似算法有可能不准
## 
##  Pearson's Chi-squared test
## 
## data:  tt
## X-squared = 524.93, df = 5, p-value < 2.2e-16

CD34+ cells HD vs MM

m <- MM@meta.data
m <- m[m$cell =="CD34",]

m$type = m$sample
m[m$type != 'HD','type'] = 'MM'

m$number=1

p1 <- ggplot(m,aes(type,number,fill=celltype_new))+
  geom_bar(stat="identity",position="stack")+
  theme_classic(base_size = 16)+
  scale_fill_manual(values = col_CD34)+
  RotatedAxis()+
  NoLegend()

m <- ddply(m,'type',transform,percent = 1/sum(number)*100)
p2 <- ggplot(m,aes(type,percent,fill=celltype_new))+
  geom_bar(stat="identity",position="stack")+
  theme_classic(base_size = 16)+
  scale_fill_manual(values = col_CD34)+
  RotatedAxis()

p1 + p2

prop test
t = table(m$type,m$celltype_new)
t
##     
##      HSC MPP LMPP MLP GMP MEP Prog_B 1 Prog_B 2 Naive B Memory B Plasmablast
##   HD 629  13    9  23  17   8        0        0       0        0           0
##   MM 310   7   55 273 203 160        0        0       0        0           0
##     
##      Other
##   HD     0
##   MM     0
prop.table(t[1,1:6])
##        HSC        MPP       LMPP        MLP        GMP        MEP 
## 0.89985694 0.01859800 0.01287554 0.03290415 0.02432046 0.01144492
prop.table(t[2,1:6])
##         HSC         MPP        LMPP         MLP         GMP         MEP 
## 0.307539683 0.006944444 0.054563492 0.270833333 0.201388889 0.158730159
print(table(m$type))
## 
##   HD   MM 
##  699 1008
# MM vs HD all
tt = matrix(t[1:2,1:6],nrow = 2)
print(chisq.test(tt))
## 
##  Pearson's Chi-squared test
## 
## data:  tt
## X-squared = 613.32, df = 5, p-value < 2.2e-16
# HSC
prop.test(c(629,310),c(699,1008)) # HSC HD, HSC MM, HD all, MM all
## 
##  2-sample test for equality of proportions with continuity correction
## 
## data:  c(629, 310) out of c(699, 1008)
## X-squared = 582.74, df = 1, p-value < 2.2e-16
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  0.5549560 0.6296785
## sample estimates:
##    prop 1    prop 2 
## 0.8998569 0.3075397
# LMPP
prop.test(c(9,55),c(699,1008)) 
## 
##  2-sample test for equality of proportions with continuity correction
## 
## data:  c(9, 55) out of c(699, 1008)
## X-squared = 18.74, df = 1, p-value = 1.498e-05
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  -0.05922236 -0.02415355
## sample estimates:
##     prop 1     prop 2 
## 0.01287554 0.05456349

B cells

m <- BC@meta.data
m$number=1

p1 <- ggplot(m,aes(sample,number,fill=celltype_BMNC))+
  geom_bar(stat="identity",position="stack")+
  theme_classic(base_size = 16)+
  scale_fill_manual(values = col_B2)+
  RotatedAxis()+
  NoLegend()

m <- ddply(m,'sample',transform,percent = 1/sum(number)*100)
p2 <- ggplot(m,aes(sample,percent,fill=celltype_BMNC))+
  geom_bar(stat="identity",position="stack")+
  theme_classic(base_size = 16)+
  scale_fill_manual(values = col_B2)+
  RotatedAxis()

p1 + p2

prop test
t = table(m$sample,m$celltype_BMNC)

# MM1 vs HD
tt = matrix(t[1:2,],nrow = 2)
print(chisq.test(tt))
## 
##  Pearson's Chi-squared test
## 
## data:  tt
## X-squared = 3319.2, df = 4, p-value < 2.2e-16
# MM2 vs HD
tt = matrix(t[c(1,3),],nrow = 2)
print(chisq.test(tt))
## Warning in chisq.test(tt): Chi-squared近似算法有可能不准
## 
##  Pearson's Chi-squared test
## 
## data:  tt
## X-squared = 751.7, df = 4, p-value < 2.2e-16

B cells HD vs MM

m <- BC@meta.data
m$number=1

p1 <- ggplot(m,aes(type,number,fill=celltype_BMNC))+
  geom_bar(stat="identity",position="stack")+
  theme_classic(base_size = 16)+
  scale_fill_manual(values = col_B2)+
  RotatedAxis()+
  NoLegend()

m <- ddply(m,'type',transform,percent = 1/sum(number)*100)
p2 <- ggplot(m,aes(type,percent,fill=celltype_BMNC))+
  geom_bar(stat="identity",position="stack")+
  theme_classic(base_size = 16)+
  scale_fill_manual(values = col_B2)+
  RotatedAxis()

p1 + p2

prop test
t = table(m$type,m$celltype_BMNC)
print(t)
##     
##      Prog_B 1 Prog_B 2 Naive B Memory B Other
##   HD     1368      431     616      367     1
##   MM       70       31    1868     2431    20
prop.table(t[1,1:5])
##     Prog_B 1     Prog_B 2      Naive B     Memory B        Other 
## 0.4915558750 0.1548688466 0.2213438735 0.1318720805 0.0003593245
prop.table(t[2,1:5])
##    Prog_B 1    Prog_B 2     Naive B    Memory B       Other 
## 0.015837104 0.007013575 0.422624434 0.550000000 0.004524887
table(m$type)
## 
##   HD   MM 
## 2783 4420
# MM vs HD
tt = matrix(t[1:2,],nrow = 2)
print(chisq.test(tt))
## 
##  Pearson's Chi-squared test
## 
## data:  tt
## X-squared = 3497.3, df = 4, p-value < 2.2e-16
prop.test(c(367,2431),c(2783,4420)) 
## 
##  2-sample test for equality of proportions with continuity correction
## 
## data:  c(367, 2431) out of c(2783, 4420)
## X-squared = 1255.1, df = 1, p-value < 2.2e-16
## alternative hypothesis: two.sided
## 95 percent confidence interval:
##  -0.4377372 -0.3985186
## sample estimates:
##    prop 1    prop 2 
## 0.1318721 0.5500000

cytotrace2

## Cal cytotrace score
# library(CytoTRACE2) 
# cytotrace2_result <- cytotrace2(BC,
#                                 species = "human",
#                                 is_seurat =  T,
#                                 ncores = 10)
# 
# BC = AddMetaData(BC,cytotrace2_result$CytoTRACE2_Score,col.name = "CytoTRACE2_Score")
# BC = AddMetaData(BC,cytotrace2_result$CytoTRACE2_Relative,col.name = "CytoTRACE2_Relative")
# saveRDS(BC,"/media/liyaru/LYR/MM2023/5_Result/7_Seurat/BC_new.rds")

# VlnPlot(BC,features = 'CytoTRACE2_Score',group.by = "celltype_BMNC",split.by = "type")
# VlnPlot(BC,features = 'CytoTRACE2_Relative',group.by = "celltype_BMNC",split.by = "type")
# VlnPlot(BC,features = 'CytoTRACE2_Relative',group.by = "celltype_BMNC")
# FeaturePlot(BC,features = 'CytoTRACE2_Relative')

t <- BC@meta.data

p <- ggboxplot(t, x="celltype_BMNC",
               y="CytoTRACE2_Relative",
               fill = "celltype_BMNC",
               #add = "jitter",
               palette = col_B2,
               outlier.shape=NA)+
  RotatedAxis()
p

p <- ggboxplot(t, x="type",
               y="CytoTRACE2_Relative",
               fill = 'type',
               outlier.shape=NA,
               palette =  col_type)+
  stat_compare_means(method = "t.test")

p

SMART-seq2

Data

# Seurat object
MMsmart <- readRDS("/media/liyaru/LYR/MM2023/5_Result/1_RDS/MMsmart.rds")
HD_cds <- readRDS("/media/liyaru/LYR/MM2023/5_Result/1_RDS/monocle_HD.rds")
#MM_cds <- readRDS("/media/liyaru/LYR/MM2023/5_Result/1_RDS/monocle_MM.rds")
HD <- readRDS("/media/liyaru/LYR/MM2023/5_Result/1_RDS/MMsmart_HD.rds")
MM_project <- readRDS("/media/liyaru/LYR/MM2023/5_Result/1_RDS/MMsmart_MM_project.rds")

print(MMsmart)
## An object of class Seurat 
## 36127 features across 5367 samples within 1 assay 
## Active assay: RNA (36127 features, 2000 variable features)
##  2 dimensional reductions calculated: pca, umap
print(HD_cds)
## class: cell_data_set 
## dim: 36127 2711 
## metadata(2): cds_version citations
## assays(1): counts
## rownames(36127): TSPAN6 TNMD ... H2AQ1P NBEAP6
## rowData names(1): gene_short_name
## colnames(2711): L10.AAACATCG L10.AACAACCA ... L82.TGGTGGTA L82.TTCACGCA
## colData names(9): orig.ident nCount_RNA ... class Size_Factor
## reducedDimNames(2): PCA UMAP
## mainExpName: NULL
## altExpNames(0):
# print(MM_cds)
print(HD)
## An object of class Seurat 
## 36127 features across 2711 samples within 1 assay 
## Active assay: RNA (36127 features, 2000 variable features)
##  2 dimensional reductions calculated: pca, umap
print(MM_project)
## An object of class Seurat 
## 36134 features across 2656 samples within 2 assays 
## Active assay: RNA (36127 features, 2000 variable features)
##  1 other assay present: prediction.score.celltype
##  4 dimensional reductions calculated: pca, umap, ref.pca, ref.umap

QC

feature count

table(MMsmart$Celltype,MMsmart$replicate)
##           
##            HD01 HD02 HD03 HD04 MM01 MM02 MM03 MM04
##   Pro        88   87   72   76   74   66   67   79
##   Pre        71   87   85   72   69   68   62   79
##   Immature   85   88   79   66   85   61   79   72
##   Breg      171   84  169  181  171  147  165  155
##   NaiveB    181  176  136  159  139  150  175  167
##   Memory B   88   90   71   63   87   85   91   85
##   Plasma     48   57   30   51   31   62   27   58
p <- VlnPlot(MMsmart,features = c("nCount_RNA","nFeature_RNA"),
        group.by = "replicate",
        pt.size = 0,
        col=col_sample_s) + 
  theme(legend.position = "right")
p

Marker gene exp

HSPC_marker <- c("CD34", # HSC
                 "DNTT") # LP 
                 
BC_marker <- c(# "FLT3",
              "CD19","CD79A",
               "IL7R","MME", # CD10,
              "CD27","MS4A1")

Plasma_marker <- c("SDC1","CD38","IGKC",
                   "TNFRSF17" # BCMA
                   )

combined_plot <- NULL

for (g in c(HSPC_marker,BC_marker,Plasma_marker)){
  if (g %in% HSPC_marker){
    cols = c(alpha("#A1CDE1",0.3),"#D51F26")
  }
  if (g %in% BC_marker){
    cols = c(alpha("#A1CDE1",0.3),"#F47D2B")
  }
  if (g %in% Plasma_marker){
    cols = c(alpha("#A1CDE1",0.3),"#89288F")
  }
  
  p <- FeaturePlot(MMsmart,features = g,slot = "data",ncol = 4,pt.size = 1,order = T,
            cols = cols)
  
  if (is.null(combined_plot)){
    combined_plot <- p[[1]]
  }else{
    combined_plot <- combined_plot + p[[1]]
  }
}
print(combined_plot)

DotPlot(MMsmart,features = c(HSPC_marker,BC_marker,Plasma_marker),
        group.by = "Celltype", cols = c("white","#D51F26")) + RotatedAxis()

Monocle3 trajectory

p1 <- plot_cells(HD_cds ,color_cells_by ="Celltype",label_cell_groups = FALSE,
           cell_size = 0.6,group_label_size = 10,label_branch_points = F,
           label_leaves = F,label_roots = F,
           label_principal_points = F)+ 
  facet_wrap("~class", nrow = 1)+
  scale_color_manual(values=col_cds)+
  NoLegend()
p1 

# p2 <- plot_cells(MM_cds ,color_cells_by ="Celltype",label_cell_groups = FALSE,
#            cell_size = 0.6,group_label_size = 10,label_branch_points = F,
#            label_leaves = F,label_roots = F,
#            label_principal_points = F)+ 
#   facet_wrap("~class", nrow = 1)+
#   scale_color_manual(values=col_cds)
# 
# p1+p2
p1 <- DimPlot(HD,reduction = "umap",group.by = "Celltype",cols = col_cds)
p2 <- DimPlot(MM_project,reduction = "ref.umap",group.by = "Celltype",cols = col_cds)
p1 + p2

p1 <- DimPlot(HD,reduction = "umap",group.by = "Celltype",cols = col_cds,
        split.by = "Celltype")+NoLegend()

p2 <- DimPlot(MM_project,reduction = "ref.umap",group.by = "Celltype",cols = col_cds,
        split.by = "Celltype")+NoLegend()

p1 / p2

DimPlot(MMsmart,group.by = "class",cols = c("#89C75F","#F37B7D"))

UMAP distribution

# emb = MMsmart@reductions$umap@cell.embeddings %>% as.data.frame()
# emb$orig.ident = rownames(emb)
# meta = MMsmart@meta.data
# m <- merge(emb,meta,by="orig.ident")
# 
# mm <- m[m$Celltype == 'Plasma',]
# 
# ggplot(mm, aes(x = UMAP_2, fill = class)) +
#   geom_density(alpha = 0.3)

Slingshot

HD

library(slingshot)
## 载入需要的程辑包:princurve
## 载入需要的程辑包:TrajectoryUtils
HD.sce = as.SingleCellExperiment(HD)
sce_slingshot1 <- slingshot(HD.sce,      
                            reducedDim = 'UMAP',  
                            clusterLabels = HD.sce$Celltype,  
                            start.clus = 'Pro')
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
saveRDS(sce_slingshot1,'/media/liyaru/LYR/MM2023/5_Result/7_Seurat/Smartseq_slingshot_HD.rds')

sce_slingshot1 <- readRDS('/media/liyaru/LYR/MM2023/5_Result/7_Seurat/Smartseq_slingshot_HD.rds')

plot(reducedDims(sce_slingshot1)$UMAP, 
     col = col_cds[HD.sce$Celltype], 
     cex = 0.4,
     pch=16, asp = 1)
lines(SlingshotDataSet(sce_slingshot1), lwd=2, col='black')

# brach
pt1 = sce_slingshot1$slingPseudotime_1
pt2 = sce_slingshot1$slingPseudotime_2

pt1 = pt1 / length(pt1[!is.na(pt1)]) * 1000
pt2 = pt2 / length(pt2[!is.na(pt2)]) * 1000

colors <- colorRampPalette(brewer.pal(11,'Spectral')[-6])(100)
plotcol <- colors[cut(pt1, breaks=100)]
plot(reducedDims(sce_slingshot1)$UMAP,
     col = plotcol,
     pch=16, asp = 1)

colors <- colorRampPalette(brewer.pal(11,'Spectral')[-6])(100)
plotcol <- colors[cut(pt2, breaks=100)]
plot(reducedDims(sce_slingshot1)$UMAP,
     col = plotcol,
     pch=16, asp = 1)

pt = c()
# pt = (pt1 + pt2)/2
for (i in 1:length(pt1)){
  if(is.na(pt1[i])){
    pt_i = pt2[i]
  }else if (is.na(pt2[i])){
    pt_i = pt1[i]
  }else{
    pt_i = (pt1[i] + pt2[i])/2
  }
  pt = c(pt,pt_i)
}
names(pt) <- colnames(sce_slingshot1)
names(pt1) <- colnames(sce_slingshot1)
names(pt2) <- colnames(sce_slingshot1)

HD <- AddMetaData(HD,pt,col.name = "psuedotime")
HD <- AddMetaData(HD,pt1,col.name = "psuedotime1")
HD <- AddMetaData(HD,pt2,col.name = "psuedotime2")

# VlnPlot(HD,features = "psuedotime1",group.by = "Celltype")
# VlnPlot(HD,features = "psuedotime2",group.by = "Celltype")
# VlnPlot(HD,features = "psuedotime",group.by = "Celltype")

t <- HD@meta.data
p <- ggboxplot(t, x="Celltype",
               y="psuedotime",
               fill = "Celltype",
               #add = "jitter",
               palette = col_cds,
               outlier.shape=NA)+
  RotatedAxis()
p

medians <- aggregate(psuedotime ~ Celltype, t, median, na.rm = TRUE)
colors <- colorRampPalette(brewer.pal(11,'Spectral'))(100)
plotcol <- colors[cut(pt, breaks=100)]
plot(reducedDims(sce_slingshot1)$UMAP,
     col = plotcol,
     pch=16, asp = 1)
lines(SlingshotDataSet(sce_slingshot1), lwd=2, col='black')

MM

MM2 = subset(MMsmart,ident = 'MM')
MM.sce = as.SingleCellExperiment(MM2)
sce_slingshot1 <- slingshot(MM.sce,     
                            reducedDim = 'UMAP', 
                            clusterLabels = MM.sce$Celltype,  
                            start.clus = 'Pro')    
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = FALSE.
# SlingshotDataSet(sce_slingshot1)
saveRDS(sce_slingshot1,'/media/liyaru/LYR/MM2023/5_Result/7_Seurat/Smartseq_slingshot_MM.rds')

sce_slingshot1 <- readRDS('/media/liyaru/LYR/MM2023/5_Result/7_Seurat/Smartseq_slingshot_MM.rds')

plot(reducedDims(sce_slingshot1)$UMAP, 
     col = col_cds[MM.sce$Celltype], 
     cex = 0.4,
     pch=16, asp = 1)

lines(SlingshotDataSet(sce_slingshot1), lwd=2, col='black')

# brach
pt1 = sce_slingshot1$slingPseudotime_1
pt2 = sce_slingshot1$slingPseudotime_2

pt1 = pt1 / length(pt1[!is.na(pt1)]) * 1000
pt2 = pt2 / length(pt2[!is.na(pt2)]) * 1000

colors <- colorRampPalette(brewer.pal(11,'Spectral')[-6])(100)
plotcol <- colors[cut(pt1, breaks=100)]
plot(reducedDims(sce_slingshot1)$UMAP,
     col = plotcol,
     pch=16, asp = 1)

colors <- colorRampPalette(brewer.pal(11,'Spectral')[-6])(100)
plotcol <- colors[cut(pt2, breaks=100)]
plot(reducedDims(sce_slingshot1)$UMAP,
     col = plotcol,
     pch=16, asp = 1)

pt = c()
# pt = (pt1 + pt2)/2
for (i in 1:length(pt1)){
  if(is.na(pt1[i])){
    pt_i = pt2[i]
  }else if (is.na(pt2[i])){
    pt_i = pt1[i]
  }else{
    pt_i = (pt1[i] + pt2[i])/2
  }
  pt = c(pt,pt_i)
}
names(pt) <- colnames(sce_slingshot1)
names(pt1) <- colnames(sce_slingshot1)
names(pt2) <- colnames(sce_slingshot1)

MM2 <- AddMetaData(MM2,pt,col.name = "psuedotime")
MM2 <- AddMetaData(MM2,pt1,col.name = "psuedotime1")
MM2 <- AddMetaData(MM2,pt2,col.name = "psuedotime2")

# VlnPlot(HD,features = "psuedotime1",group.by = "Celltype")
# VlnPlot(HD,features = "psuedotime2",group.by = "Celltype")
# VlnPlot(HD,features = "psuedotime",group.by = "Celltype")

t <- MM2@meta.data
p <- ggboxplot(t, x="Celltype",
               y="psuedotime",
               fill = "Celltype",
               #add = "jitter",
               palette = col_cds,
               outlier.shape=NA)+
  RotatedAxis()
p

colors <- colorRampPalette(brewer.pal(11,'Spectral'))(100)
plotcol <- colors[cut(pt, breaks=100)]
plot(reducedDims(sce_slingshot1)$UMAP, 
     col = plotcol, 
     pch=16, asp = 1)
lines(SlingshotDataSet(sce_slingshot1), lwd=2, col='black')

Correlation

t = AverageExpression(MMsmart,group.by = c("class","Celltype"),slot = "scale.data")
tt = t[["RNA"]]
ttt = cor(tt,method = "spearman")

group = colnames(ttt)
names(group) = NULL
group = as.data.frame(group)
rownames(group) <- group$group
group <- separate(group,col="group",into = c("Class","Cell"))

cell_col <- col_cds
names(cell_col) <- c( "Pro","Pre","Immature","NaiveB","Breg","Memory","Plasma")

class_col <- c("#89C75F","#F37B7D") 
names(class_col) <- c( "HD","MM")
ann_colors <- list(Cell=cell_col,Class=class_col)

p <- pheatmap(ttt,annotation_row = group,
              annotation_colors = ann_colors)
p

# sample_correlation <- ttt %>% as.data.frame()
# fwrite(sample_correlation,"/media/liyaru/LYR/MM2023/6_PDF/Figure/Figure1H_spearman_correlation.csv",row.names = T)

cytotrace2

# library(CytoTRACE2)
# cytotrace2_result <- cytotrace2(MMsmart,
#                                 species = "human",
#                                 is_seurat =  T,
#                                 ncores = 10)
# 
# MMsmart = AddMetaData(MMsmart,cytotrace2_result$CytoTRACE2_Score,col.name = "CytoTRACE2_Score")
# MMsmart = AddMetaData(MMsmart,cytotrace2_result$CytoTRACE2_Relative,col.name = "CytoTRACE2_Relative")
# 
# saveRDS(MMsmart,"/media/liyaru/LYR/MM2023/5_Result/1_RDS/MMsmart.rds")
# 
# FeaturePlot(MMsmart,features = 'CytoTRACE2_Relative')

t <- MMsmart@meta.data
t <- t[t$Celltype !='Plasma',]


p <- ggboxplot(t, x="Celltype",
               y="CytoTRACE2_Relative",
               fill = "Celltype",
               palette = col_cds,
               outlier.shape=NA)+
  RotatedAxis()
p

p <- ggboxplot(t, x="class",
               y="CytoTRACE2_Relative",
               fill = "class",
               palette = col_type,
               outlier.shape=NA)+
  stat_compare_means(method = "t.test")
p

Figure2

data

BC <- readRDS("/media/liyaru/LYR/MM2023/5_Result/7_Seurat/BC_new.rds")
print(BC)
## An object of class Seurat 
## 23625 features across 7203 samples within 2 assays 
## Active assay: RNA (21625 features, 0 variable features)
##  1 other assay present: integrated
##  2 dimensional reductions calculated: pca, umap

CNV result (from inferCNV)

HSC

knitr::include_graphics("/media/liyaru/LYR/MM2023/5_Result/inferCNV/HSC/inferCNV.png")

knitr::include_graphics("/media/liyaru/LYR/MM2023/5_Result/inferCNV/HSC/HMM.png")

BC

knitr::include_graphics("/media/liyaru/LYR/MM2023/5_Result/inferCNV/BC/inferCNV.png")

# HMM
knitr::include_graphics("/media/liyaru/LYR/MM2023/5_Result/inferCNV/BC/HMM.png")

Plasma

# HMM
knitr::include_graphics("/media/liyaru/LYR/MM2023/5_Result/inferCNV/P/inferCNV.png")

knitr::include_graphics("/media/liyaru/LYR/MM2023/5_Result/inferCNV/P/HMM.png")

CNV score

HSC

Chr1

df_chr1 <- fread("/media/liyaru/LYR/MM2023/5_Result/inferCNV/HSC/HSC_chr1_CNV_score.csv")

# Rmisc::CI(df_chr1$MM1,ci=0.05)
# Rmisc::CI(df_chr1$MM2,ci=0.05)

q = quantile(df_chr1$MM1,c(0.05,0.5,0.95))
print(q)
##        5%       50%       95% 
## 0.9642791 1.0072750 1.0201152
# t.test(df_chr1$HD2,df_chr1$MM1,paired = T)
# wilcox.test(df_chr1$HD2,df_chr1$MM1,paired = T)

p1 <- plot(df_chr1$start,df_chr1$MM1,type="l",ylim = c(0.8,1.2),lwd=3)+ 
  abline(h=0.95,lty=2,col="navy")+
  abline(h=1,lty=2,col="grey")+
  abline(h=1.05,lty=2,col="brown3")

q = quantile(df_chr1$MM2,c(0.05,0.5,0.95))
print(q)
##        5%       50%       95% 
## 0.9599260 0.9969523 1.0320886
p2 <- plot(df_chr1$start,df_chr1$MM2,type="l",ylim = c(0.8,1.2),lwd=3)+ 
  abline(h= 0.95,lty=2,col="navy")+
  abline(h= 1,lty=2,col="grey")+
  abline(h= 1.05,lty=2,col="brown3")

Chr17

df_chr17 <- fread("/media/liyaru/LYR/MM2023/5_Result/inferCNV/HSC/HSC_chr17_CNV_score.csv")

q = quantile(df_chr17$MM1,c(0.05,0.5,0.95))
print(q)
##        5%       50%       95% 
## 0.9854408 1.0037500 1.0318202
p1 <- plot(df_chr17$start,df_chr17$MM1,type="l",ylim = c(0.8,1.2),lwd=3)+ 
  abline(h= 0.95,lty=2,col="navy")+
  abline(h= 1,lty=2,col="grey")+
  abline(h= 1.05,lty=2,col="brown3")

q = quantile(df_chr17$MM2,c(0.05,0.5,0.95))
print(q)
##        5%       50%       95% 
## 0.9657920 0.9984778 1.0100204
p2 <- plot(df_chr17$start,df_chr17$MM2,type="l",ylim = c(0.8,1.2),lwd=3)+ 
  abline(h= 0.95,lty=2,col="navy")+
  abline(h= 1,lty=2,col="grey")+
  abline(h= 1.05,lty=2,col="brown3")

BC group by 1q

Chr1

df_chr1 <- fread("/media/liyaru/LYR/MM2023/5_Result/inferCNV/BC/BC_chr1_CNV_score.csv")

# p1 <- plot(df_chr1$start,df_chr1$HD,type="l",ylim = c(0.8,1.2),lwd=3) + 
#   abline(h= 0.95,lty=2,col="navy")+
#   abline(h= 1,lty=2,col="grey")+
#   abline(h= 1.05,lty=2,col="brown3")

q = quantile(df_chr1$MM1.Chr1_dupli,c(0.05,0.5,0.95))
print(q)
##        5%       50%       95% 
## 0.9599969 0.9999678 1.0697965
p2 <- plot(df_chr1$start,df_chr1$MM1.Chr1_dupli,type="l",ylim = c(0.8,1.2),lwd=3)+ 
  abline(h= 0.95,lty=2,col="navy")+
  abline(h= 1,lty=2,col="grey")+
  abline(h= 1.05,lty=2,col="brown3")

q = quantile(df_chr1$MM1.Other,c(0.05,0.5,0.95))
print(q)
##        5%       50%       95% 
## 0.9818824 1.0000517 1.0292792
p3 <- plot(df_chr1$start,df_chr1$MM1.Other,type="l",ylim = c(0.8,1.2),lwd=3)+ 
  abline(h= 0.95,lty=2,col="navy")+
  abline(h= 1,lty=2,col="grey")+
  abline(h= 1.05,lty=2,col="brown3")

q = quantile(df_chr1$MM2.Chr1_dupli,c(0.05,0.5,0.95))
print(q)
##        5%       50%       95% 
## 0.9673445 0.9995793 1.0742516
p4 <- plot(df_chr1$start,df_chr1$MM2.Chr1_dupli,type="l",ylim = c(0.8,1.2),lwd=3)+ 
  abline(h= 0.95,lty=2,col="navy")+
  abline(h= 1,lty=2,col="grey")+
  abline(h= 1.05,lty=2,col="brown3")

q = quantile(df_chr1$MM2.Other,c(0.05,0.5,0.95))
print(q)
##        5%       50%       95% 
## 0.9786280 0.9970898 1.0309999
p5 <- plot(df_chr1$start,df_chr1$MM2.Other,type="l",ylim = c(0.8,1.2),lwd=3)+ 
  abline(h= 0.95,lty=2,col="navy")+
  abline(h= 1,lty=2,col="grey")+
  abline(h= 1.05,lty=2,col="brown3")

Chr17

df_chr <- fread("/media/liyaru/LYR/MM2023/5_Result/inferCNV/BC/BC_chr17_CNV_score.csv")

p2 <- plot(df_chr$start,df_chr$MM1.Chr1_dupli,type="l",ylim = c(0.8,1.2),lwd=3)+ 
  abline(h= 0.95,lty=2,col="navy")+
  abline(h= 1,lty=2,col="grey")+
  abline(h= 1.05,lty=2,col="brown3")

p3 <- plot(df_chr$start,df_chr$MM1.Other,type="l",ylim = c(0.8,1.2),lwd=3)+ 
  abline(h= 0.95,lty=2,col="navy")+
  abline(h= 1,lty=2,col="grey")+
  abline(h= 1.05,lty=2,col="brown3")

p4 <- plot(df_chr$start,df_chr$MM2.Chr1_dupli,type="l",ylim = c(0.8,1.2),lwd=3)+ 
  abline(h= 0.95,lty=2,col="navy")+
  abline(h= 1,lty=2,col="grey")+
  abline(h= 1.05,lty=2,col="brown3")

p5 <- plot(df_chr$start,df_chr$MM2.Other,type="l",ylim = c(0.8,1.2),lwd=3)+ 
  abline(h= 0.95,lty=2,col="navy")+
  abline(h= 1,lty=2,col="grey")+
  abline(h= 1.05,lty=2,col="brown3")

BC group by sample

Chr1

df_chr1 <- fread("/media/liyaru/LYR/MM2023/5_Result/inferCNV/BC/BC_chr1_CNV_score_sample.csv")

q = quantile(df_chr1$MM1,c(0.05,0.5,0.95))
print(q)
##        5%       50%       95% 
## 0.9842155 0.9974610 1.0343610
p1 <- plot(df_chr1$start,df_chr1$MM1,type="l",ylim = c(0.8,1.2),lwd=3)+ 
  abline(h= 0.95,lty=2,col="navy")+
  abline(h= 1,lty=2,col="grey")+
  abline(h= 1.05,lty=2,col="brown3")

q = quantile(df_chr1$MM2,c(0.05,0.5,0.95))
print(q)
##        5%       50%       95% 
## 0.9805379 1.0000583 1.0380903
p2 <- plot(df_chr1$start,df_chr1$MM2,type="l",ylim = c(0.8,1.2),lwd=3)+ 
  abline(h= 0.95,lty=2,col="navy")+
  abline(h= 1,lty=2,col="grey")+
  abline(h= 1.05,lty=2,col="brown3")

Chr17

df_chr <- fread("/media/liyaru/LYR/MM2023/5_Result/inferCNV/BC/BC_chr17_CNV_score_sample.csv")

p1 <- plot(df_chr$start,df_chr$MM1,type="l",ylim = c(0.8,1.2),lwd=3)+ 
  abline(h= 0.95,lty=2,col="navy")+
  abline(h= 1,lty=2,col="grey")+
  abline(h= 1.05,lty=2,col="brown3")

p2 <- plot(df_chr$start,df_chr$MM2,type="l",ylim = c(0.8,1.2),lwd=3)+ 
  abline(h= 0.95,lty=2,col="navy")+
  abline(h= 1,lty=2,col="grey")+
  abline(h= 1.05,lty=2,col="brown3")

Plasma

Chr1

df_chr1 <- fread("/media/liyaru/LYR/MM2023/5_Result/inferCNV/P/P_chr1_CNV_score.csv")

p1 <- plot(df_chr1$start,df_chr1$MM1,type="l",ylim = c(0.8,1.2),lwd=3)+ 
  abline(h= 0.95,lty=2,col="navy")+
  abline(h= 1,lty=2,col="grey")+
  abline(h= 1.05,lty=2,col="brown3")

p2 <- plot(df_chr1$start,df_chr1$MM2,type="l",ylim = c(0.8,1.2),lwd=3)+ 
  abline(h= 0.95,lty=2,col="navy")+
  abline(h= 1,lty=2,col="grey")+
  abline(h= 1.05,lty=2,col="brown3")

Chr17

df_chr17 <- fread("/media/liyaru/LYR/MM2023/5_Result/inferCNV/P/P_chr17_CNV_score.csv")

p1 <- plot(df_chr17$start,df_chr17$MM1,type="l",ylim = c(0.8,1.2),lwd=3)+ 
  abline(h= 0.95,lty=2,col="navy")+
  abline(h= 1,lty=2,col="grey")+
  abline(h= 1.05,lty=2,col="brown3")

p2 <- plot(df_chr17$start,df_chr17$MM2,type="l",ylim = c(0.8,1.2),lwd=3)+ 
  abline(h= 0.95,lty=2,col="navy")+
  abline(h= 1,lty=2,col="grey")+
  abline(h= 1.05,lty=2,col="brown3")

Figure 3

data

BC <- readRDS("/media/liyaru/LYR/MM2023/5_Result/7_Seurat/BC_new.rds")
print(BC)
## An object of class Seurat 
## 23625 features across 7203 samples within 2 assays 
## Active assay: RNA (21625 features, 0 variable features)
##  1 other assay present: integrated
##  2 dimensional reductions calculated: pca, umap

Marker exp

Heatmap

DefaultAssay(BC) <- "RNA"
# BC <- ScaleData(BC)
# markers <- FindAllMarkers(BC)
# fwrite(markers,"/media/liyaru/LYR/MM2023/Github/Sup_tables/BC_markers.csv")

markers <- fread("/media/liyaru/LYR/MM2023/Github/Sup_tables/BC_markers.csv")
top_m <- markers %>% group_by(cluster) %>% top_n(n=5,wt=avg_log2FC)
mmm = top_m$gene

print(mmm)
##  [1] "HMGB2"      "STMN1"      "IGLL1"      "TUBA1B"     "HIST1H4C"  
##  [6] "HIST1H1C"   "CCDC191"    "SOX4"       "ACSM3"      "YBX3"      
## [11] "IL4R"       "FCER2"      "HBB"        "HBA2"       "HBA1"      
## [16] "GPR183"     "LINC01781"  "FOSB"       "AC020916.1" "KLF6"      
## [21] "CIB1"       "FCRL5"      "MS4A1"      "NEAT1"      "CRIP1"     
## [26] "S100A4"     "ITGB1"      "S100A10"    "CRIP1"      "S100A6"
# CRIP1 replicate

BC_downsample = subset(BC,downsample=200)
BC_downsample@assays$RNA@scale.data = rbind(BC_downsample@assays$RNA@scale.data, "CRIP1.2" = BC_downsample@assays$RNA@scale.data["CRIP1", ])

mmm[29] = "CRIP1.2"

DoHeatmap(BC_downsample,features = mmm,group.by = "celltype",group.colors =  col_B,
          draw.lines = T,lines.width=6)+
  scale_fill_gradientn(colors = c("#4575B4","white","#D73027"))
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

Vlnplot

B_surface <- c("CD19","MS4A1", #"CD20",
             "CD24","CD27","CD38")

B_pro <- c("BCL2","ZEB2","PAX5","KLF10","RUNX1")

pla_diff <- c("PRDM1","XBP1","SSR4","SEC61G","PPIB")
              
antigen <- c("TNFRSF13B","TNFRSF1B","ITGAX","FCRL5","CCR7")

DefaultAssay(BC) <- "RNA"
p <- VlnPlot(BC,features = c(B_surface,B_pro,pla_diff,antigen),
        group.by = "celltype",ncol = 5,pt.size = 0,slot="data")&
  scale_fill_manual(values = col_B)&
  theme(axis.text.x = element_blank(),
        axis.ticks.x = element_blank())&
  labs(x="",y="")

p

Featureplot

col_gene <- colorRampPalette(c("navy", "white", "brown"))(100)[c(32:46,65:100)]
# col_gene <- colorRampPalette(c("navy", "white", "brown"))(100)
DefaultAssay(MM) <- "RNA"
DefaultAssay(BC) <- "RNA"

p1 <- FeaturePlot(MM,features = "CD24",slot = "data",cols = col_gene)

p2 <- FeaturePlot(MM,features = "FCRL5",slot = "data",cols = col_gene)

p3 <- FeaturePlot(BC,features = "CD24",slot = "data",cols = col_gene)

p4 <- FeaturePlot(BC,features = "FCRL5",slot = "data",cols = col_gene)

p1 + p2 | p3 + p4

GO

# calculate DEGs
# DefaultAssay(BC) <- "RNA"
# Idents(BC) <- BC$has_dupli_chr1
# DEG = FindMarkers(BC,ident.1 = "FALSE",ident.2 = "TRUE")
# fwrite(DEG,"/media/liyaru/LYR/MM2023/5_Result/CD19.CNV.DEG.csv",row.names = T)

DEG <- fread("/media/liyaru/LYR/MM2023/5_Result/CD19.CNV.DEG.csv") %>% as.data.frame()
# rownames(DEG) <- DEG$V1
# 
# # other BC vs 1q B cells, logFC < 0 means 1q upregulate
up = DEG[DEG$p_val_adj<0.05 & DEG$avg_log2FC < -0.5,] 
# 
# up$gene <- rownames(up)
# ego <- enrichGO(
#   gene          = up$gene,
#   keyType       = "SYMBOL",
#   OrgDb         =  org.Hs.eg.db,
#   ont           = "ALL",
#   pAdjustMethod = "BH",
#   pvalueCutoff  = 0.05,
#   qvalueCutoff  = 0.05,
#   #readable      = TRUE
# )
# GO  = ego@result
# fwrite(GO,"/media/liyaru/LYR/MM2023/5_Result/BC_1q_up_GO.csv")

GO <- fread("/media/liyaru/LYR/MM2023/5_Result/BC_1q_up_GO.csv") %>% as.data.frame()
term <- fread("/media/liyaru/LYR/MM2023/5_Result/CD19_CNV_up_GO_selected.csv") %>% as.data.frame()

GO <- GO[GO$Description %in% term$Description,]
GO <- separate(GO,col="GeneRatio",into = c("DEG","Geneset"),remove = F)
GO$GenePercent <- as.integer(GO$DEG) / as.integer(GO$Geneset) * 100
GO <- GO[order(GO$GenePercent),]
GO$Description <- factor(GO$Description,levels = GO$Description)

ggplot(GO,aes(x = GenePercent,y = Description))+
  geom_point(aes(
    color=p.adjust,
    size=Count))+
  scale_size()+
  scale_color_gradientn(colors = c(rdwhbu(100)[100:1]))+
  theme_bw()

UMAP

p1 <- DimPlot(BC,group.by = "integrated_snn_res.0.3",pt.size = 0.6)
p2 <- DimPlot(BC,group.by = "celltype_BMNC",cols=col_B2,pt.size = 0.6)
p3 <- DimPlot(BC,group.by = "celltype",cols=col_B,pt.size = 0.6)
p4 <- DimPlot(BC,group.by = "has_dupli_chr1" ,cols = c("grey","brown3"),pt.size = 0.6)

p1 + p2 + p3 + p4

Marker gene exp

HSPC_marker <- c("CD34", # HSC
                 "DNTT") # LP 
                 
BC_marker <- c(# "FLT3",
              "CD19","CD79A",
               "IL7R","MME", # CD10,
              "CD27","MS4A1")

Plasma_marker <- c("SDC1","CD38" # BCMA
                   )

combined_plot <- NULL

for (g in c(HSPC_marker,BC_marker,Plasma_marker)){
  if (g %in% HSPC_marker){
    cols = c(alpha("#A1CDE1",0.3),"#D51F26")
  }
  if (g %in% BC_marker){
    cols = c(alpha("#A1CDE1",0.3),"#F47D2B")
  }
  if (g %in% Plasma_marker){
    cols = c(alpha("#A1CDE1",0.3),"#89288F")
  }
  
  p <- FeaturePlot(BC,features = g,slot = "data",ncol = 4,pt.size = 1,order = T,
            cols = cols)
  
  if (is.null(combined_plot)){
    combined_plot <- p[[1]]
  }else{
    combined_plot <- combined_plot + p[[1]]
  }
}
print(combined_plot)

B cell in global UMAP

# # Add B cell inormation in MM seurat project
# Bcelltype  <- as.character(BC$celltype)
# names(Bcelltype) <- rownames(BC@meta.data)
# MM <- AddMetaData(MM,Bcelltype,col.name = "Bcelltype")
# 
# B_dupli_chr1  <- as.character(BC$has_dupli_chr1)
# names(B_dupli_chr1) <- rownames(BC@meta.data)
# MM <- AddMetaData(MM,B_dupli_chr1,col.name = "B_dupli_chr1")
# 
MM@meta.data$Bcelltype <- factor(MM$Bcelltype,levels = c("Pro B","Pre B","Naive B","Memory-Like 1","Memory-Like 2","Memory-Like 3"))

p1 <- DimPlot(MM,group.by = "Bcelltype",cols=col_B,pt.size = 0.6)
p2 <- DimPlot(MM,group.by = "B_dupli_chr1",cols=c("#BFD3E6","brown3"),pt.size = 0.6)

p1 + p2

UMAP distribution

emb = BC@reductions$umap@cell.embeddings %>% as.data.frame()
emb$cellname = rownames(emb)
meta = BC@meta.data
meta$cellname = rownames(meta)
m <- merge(emb,meta,by="cellname")

ggplot(m, aes(x = -UMAP_2, fill = has_dupli_chr1)) +
  geom_density(alpha = 0.3)+
  scale_fill_manual(values = c("#BFD3E6","brown3"))+ 
  theme_classic()

Figure 4

data

BCP <- readRDS("/media/liyaru/LYR/MM2023/5_Result/7_Seurat/BCP.rds")
# MM <- readRDS("/media/liyaru/LYR/MM2023/5_Result/7_Seurat/MM_new.rds")
# m = MM@meta.data
# m$BC_Plasma <- NA
# 
# # m[!is.na(m$B_dupli_chr1) & m$B_dupli_chr1 == "TRUE",'BC_Plasma'] <- 'BC_1q'
# # m[!is.na(m$B_dupli_chr1) & m$B_dupli_chr1 == "FALSE",'BC_Plasma'] <- 'BC_other'
# # m[m$cell =='CD138' & m$celltype_new =='Plasmablast','BC_Plasma'] <- 'Plasma'
# # MM@meta.data = m
# 
# m[!is.na(m$Bcelltype) & m$Bcelltype == "Memory-Like 2",'BC_Plasma'] <- "Memory-Like 2"
# m[!is.na(m$Bcelltype) & m$Bcelltype != "Memory-Like 2",'BC_Plasma'] <- 'BC_other'
# m[m$cell =='CD138' & m$celltype_new =='Plasmablast','BC_Plasma'] <- 'Plasma'
# MM@meta.data = m
# 
# cells <- rownames(MM@meta.data[!is.na(MM@meta.data$BC_Plasma),])
# table(MM@meta.data$BC_Plasma)
# BCP <- subset(MM,cells=cells)

Expression

col_BCP <- c("#bde0db","#eebabb","#ae8ec2")

gene = c("CD19","MS4A1","CD38","TNFRSF17",'GPRC5D')
chr1q = c("TXNIP","MCL1","PBXIP1","JTB",'ANP32E')

B_surface <- c("CD24","CD27")
B_pro <- c("BCL2","ZEB2","PAX5","KLF10","RUNX1")
pla_diff <- c("PRDM1","XBP1","SSR4","SEC61G","PPIB")
antigen <- c("TNFRSF13B","TNFRSF1B","ITGAX","FCRL5","CCR7")

p <- VlnPlot(BCP,group.by = 'BC_Plasma',assay = "RNA",slot = "data",ncol=6,
             pt.size = 0,cols = col_BCP,
             features = c(gene,chr1q,B_surface,B_pro,pla_diff,antigen))&
  theme(axis.text.x = element_blank(),
        axis.ticks.x = element_blank())&
  labs(x="",y="")

p

BCR

draw_plasma_top10_clone <- function(patient){
  path = "/media/liyaru/LYR/MM2023/5_Result/14_BCR/BCR_all/result/1.CloneStat/Total/"
  
  CD24 = fread(paste0(path,patient[1]))
  FCRL5 = fread(paste0(path,patient[2]))
  PC = fread(paste0(path,patient[3]))
  
  clone_count <- function(clone){
    cc = aggregate(clone$cloneCount,by=list(clone$VDJCombination),sum)
    colnames(cc) <- c("clone","count")
    cc <- cc[order(-cc$count),]
    return(cc)
  }
  
  CD24_clone_count = clone_count(CD24)
  FCRL5_clone_count  = clone_count(FCRL5 )
  PC_clone_count = clone_count(PC)
  
  colnames(CD24_clone_count)[2] <- "CD24_clone_count"
  colnames(FCRL5_clone_count)[2] <- "FCRL5_clone_count"
  colnames(PC_clone_count)[2] <- "PC_clone_count"
  
  df <- merge(CD24_clone_count,FCRL5_clone_count,by='clone',all.x=T,all.y=T)
  df <- merge(df,PC_clone_count,by='clone',all.x=T,all.y=T)
  df[is.na(df)] = 0
  
  df$CD24_percent <- df$CD24 / sum(df$CD24)
  df$FCRL5_percent <- df$FCRL5 / sum(df$FCRL5)
  df$PC_percent <- df$PC / sum(df$PC)
  
  # plasma top10 clone
  df <- df[order(-df$PC_clone_count),]
  fwrite(df,paste0("/media/liyaru/LYR/MM2023/5_Result/14_BCR/",patient[4],"_all.csv"))
  
  df_filter <- df[1:10,]
  fwrite(df_filter,paste0("/media/liyaru/LYR/MM2023/5_Result/14_BCR/",patient[4],"_plasma_top10.csv"))
  
  # df_filter$CD24_percent <- df_filter$CD24 / sum(df_filter$CD24)
  # df_filter$FCRL5_percent <- df_filter$FCRL5 / sum(df_filter$FCRL5)
  # df_filter$PC_percent <- df_filter$PC / sum(df_filter$PC)
  
  df_long <- reshape2::melt(df_filter[,c("clone","CD24_percent","FCRL5_percent","PC_percent")],
                            id.vars="clone")
  colnames(df_long) <- c("clone","sample","percent")
  
  p <- ggplot(df_long, 
              aes(x = sample,y=percent,fill = clone,
                            stratum =  clone, alluvium =  clone))+
    geom_col(position = 'stack', width = 0.6)+
    theme_classic(base_size = 16)+
    geom_stratum(width = 0.5, color='white')+
    geom_alluvium(alpha = 0.4,
                  width = 0.5,
                  curve_type = "linear")+
    RotatedAxis()
  return(p)
}

HFB <- c("HFB_CD24/HFB_CD24.clonetype.filter.txt",
         "HFB_CD307/HFB_CD307.clonetype.filter.txt",
         "HFB_P/HFB_P.clonetype.filter.txt",
         "HFB")

DCP <- c("DCP_CD24/DCP_CD24.clonetype.filter.txt",
         "DCP_CD307/DCP_CD307.clonetype.filter.txt",
         "DCP_P/DCP_P.clonetype.filter.txt",
         "DCP")

LKS <- c("LKS_CD24/LKS_CD24.clonetype.filter.txt",
         "LKS_CD307/LKS_CD307.clonetype.filter.txt",
         "LKS_P/LKS_P.clonetype.filter.txt",
         "LKS")

p1 <- draw_plasma_top10_clone(HFB)
p2 <- draw_plasma_top10_clone(DCP)
p3 <- draw_plasma_top10_clone(LKS)

p1 + p2 + p3

Figure6

scATAC

Data

setwd("/media/liyaru/LYR/MM2023/5_Result/17_scATAC")
MM1BC <- loadArchRProject(path = "./MM1_BC") #scATAC
BC <- readRDS("/media/liyaru/LYR/MM2023/5_Result/7_Seurat/BC_new.rds") #scRNA
print(MM1BC)
## class: ArchRProject 
## outputDirectory: /media/liyaru/LYR/MM2023/5_Result/17_scATAC/MM1_BC 
## samples(1): MM1_BC
## sampleColData names(1): ArrowFiles
## cellColData names(24): Sample TSSEnrichment ... ReadsInPeaks FRIP
## numberOfCells(1): 2720
## medianTSS(1): 12.3265
## medianFrags(1): 8254.5

QC

df <- getCellColData(MM1BC, select = c("log10(nFrags)", "TSSEnrichment"))
p1 <- ggPoint(
  x = df[,1],
  y = df[,2],
  colorDensity = TRUE,
  continuousSet = "sambaNight",
  xlabel = "Log10 Unique Fragments",
  ylabel = "TSS Enrichment",
  xlim = c(log10(500), quantile(df[,1], probs = 0.99)),
  ylim = c(0, quantile(df[,2], probs = 0.99))
) + geom_hline(yintercept = 4, lty = "dashed") + geom_vline(xintercept = 3, lty = "dashed")

p1

setwd("/media/liyaru/LYR/MM2023/5_Result/17_scATAC")
p2 <- plotGroups(
  ArchRProj = MM1BC,
  colorBy = "cellColData",
  name = c("TSSEnrichment"),
  plotAs = "violin",
  alpha = 0.4,
  addBoxPlot = TRUE
)
## 1
p3 <- plotGroups(
  ArchRProj = MM1BC,
  colorBy = "cellColData",
  name = c("log10(nFrags)"),
  plotAs = "violin",
  alpha = 0.4,
  addBoxPlot = TRUE
)
## 1
p2 + p3

setwd("/media/liyaru/LYR/MM2023/5_Result/17_scATAC")
p4 <- plotFragmentSizes(ArchRProj = MM1BC)
## ArchR logging to : ArchRLogs/ArchR-plotFragmentSizes-465241e22e6a-Date-2024-08-26_Time-10-21-11.log
## If there is an issue, please report to github with logFile!
## 2024-08-26 10:21:11 : MM1_BC Computing FragmentSizes (1 of 1)!, 0.002 mins elapsed.
## 2024-08-26 10:21:30 : MM1_BC Finished Computing FragmentSizes (1 of 1)!, 0.32 mins elapsed.
## ArchR logging successful to : ArchRLogs/ArchR-plotFragmentSizes-465241e22e6a-Date-2024-08-26_Time-10-21-11.log
p5 <- plotTSSEnrichment(ArchRProj = MM1BC)
## ArchR logging to : ArchRLogs/ArchR-plotTSSEnrichment-46521e950715-Date-2024-08-26_Time-10-21-30.log
## If there is an issue, please report to github with logFile!
## 2024-08-26 10:21:30 : MM1_BC Computing TSS (1 of 1)!, 0.005 mins elapsed.
## 2024-08-26 10:21:55 : MM1_BC Finished Computing TSS (1 of 1)!, 0.425 mins elapsed.
## ArchR logging successful to : ArchRLogs/ArchR-plotTSSEnrichment-46521e950715-Date-2024-08-26_Time-10-21-30.log
p4 / p5

UMAP

setwd("/media/liyaru/LYR/MM2023/5_Result/17_scATAC")
# MM1BC@cellColData$RNA_celltype <- factor(MM1BC@cellColData$RNA_celltype,levels =  c("Pro B","Pre B","Naive B","Memory-Like 1","Memory-Like 2","Memory-Like 3"))

p1 <- plotEmbedding(ArchRProj = MM1BC, 
                    colorBy = "cellColData", name = "Clusters", 
                    embedding = "UMAP",size = 2)

p2 <- plotEmbedding(ArchRProj = MM1BC, 
                    colorBy = "cellColData", name = "RNA_celltype", 
                    embedding = "UMAP",size = 2)+
  scale_color_manual(values=c("#F47D2B","#D51F26","#89288F","#8A9FD1","#90D5E4","#208A42"))

p3 <- plotEmbedding(ArchRProj = MM1BC, 
                    colorBy = "cellColData", name = "RNA_dupli_chr1", 
                    embedding = "UMAP",size = 2)+
  scale_color_manual(values=c("grey","brown3"))


p1 + p2 + p3

Track

CD19

## ArchR logging to : ArchRLogs/ArchR-plotBrowserTrack-46527eeefcfa-Date-2024-08-26_Time-10-22-01.log
## If there is an issue, please report to github with logFile!
## 2024-08-26 10:22:01 : Validating Region, 0.002 mins elapsed.
## GRanges object with 1 range and 2 metadata columns:
##       seqnames            ranges strand |     gene_id      symbol
##          <Rle>         <IRanges>  <Rle> | <character> <character>
##   [1]    chr16 28931939-28939346      + |         930        CD19
##   -------
##   seqinfo: 24 sequences from hg38 genome
## 2024-08-26 10:22:01 : Adding Bulk Tracks (1 of 1), 0.003 mins elapsed.
## Getting Region From Arrow Files 1 of 1
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## ℹ The deprecated feature was likely used in the ArchR package.
##   Please report the issue to the authors.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## 2024-08-26 10:22:02 : Adding Gene Tracks (1 of 1), 0.011 mins elapsed.
## 2024-08-26 10:22:02 : Plotting, 0.015 mins elapsed.
## ArchR logging successful to : ArchRLogs/ArchR-plotBrowserTrack-46527eeefcfa-Date-2024-08-26_Time-10-22-01.log

CD38

## GRanges object with 1 range and 2 metadata columns:
##       seqnames            ranges strand |     gene_id      symbol
##          <Rle>         <IRanges>  <Rle> | <character> <character>
##   [1]     chr4 15778275-15853230      + |         952        CD38
##   -------
##   seqinfo: 24 sequences from hg38 genome

FCRLF5

## GRanges object with 1 range and 2 metadata columns:
##       seqnames              ranges strand |     gene_id      symbol
##          <Rle>           <IRanges>  <Rle> | <character> <character>
##   [1]     chr1 157513377-157552520      - |       83416       FCRL5
##   -------
##   seqinfo: 24 sequences from hg38 genome

## GRanges object with 1 range and 2 metadata columns:
##       seqnames              ranges strand |     gene_id      symbol
##          <Rle>           <IRanges>  <Rle> | <character> <character>
##   [1]     chr1 157513377-157552520      - |       83416       FCRL5
##   -------
##   seqinfo: 24 sequences from hg38 genome

Differential Peaks

# markerTest <- getMarkerFeatures(
#   ArchRProj = MM1BC,
#   useMatrix = "PeakMatrix",
#   groupBy = "RNA_celltype",
#   testMethod = "wilcoxon",
#   bias = c("TSSEnrichment", "log10(nFrags)"),
#   useGroups = "Memory-Like 2"
# )
# saveRDS(markerTest,"/media/liyaru/LYR/MM2023/5_Result/17_scATAC/markerTest.rds")

markerTest <- readRDS("/media/liyaru/LYR/MM2023/5_Result/17_scATAC/markerTest.rds")
pma <- plotMarkers(seMarker = markerTest, 
                  name = "Memory-Like 2", 
                  cutOff ="FDR <= 0.05 & abs(Log2FC) >= 1", 
                  plotAs = "MA")
pma

Enriched Motif

# motifsUp <- peakAnnoEnrichment(
#   seMarker = markerTest,
#   ArchRProj = MM1BC,
#   peakAnnotation = "Motif",
#   cutOff = "FDR < 0.05 & Log2FC > 1"
# )
# motifsUp
# saveRDS(motifsUp,"/media/liyaru/LYR/MM2023/5_Result/17_scATAC/motifsUp.rds")

motifsUp <- readRDS("/media/liyaru/LYR/MM2023/5_Result/17_scATAC/motifsUp.rds")
df <- data.frame(TF = rownames(motifsUp), mlog10Padj = assay(motifsUp)[,1])
df <- df[order(df$mlog10Padj, decreasing = TRUE),]
df$rank <- seq_len(nrow(df))
df <- separate(df,col = "TF",into = c("TF","TFsup"))
# fwrite(df,"/media/liyaru/LYR/MM2023/5_Result/17_scATAC/motifsUp_df.csv")

ggUp <- ggplot(df, aes(rank, mlog10Padj, color = mlog10Padj)) + 
  geom_point(size = 1) +
  ggrepel::geom_label_repel(
        data = df[1:40, ],
        aes(label = TF), size = 3,show.legend = FALSE,
        label.size=NA,fill = NA,color = "black",
        max.overlaps=25) + 
  ylab("-log10(P-adj) Motif Enrichment") + 
  xlab("Rank Sorted TFs Enriched") +
  theme_bw()+
  scale_color_gradientn(colors = paletteContinuous(set = "comet"))+
  geom_hline(yintercept = -log10(0.01),lty=3,col="black",lwd=0.5)

ggUp
## Warning: ggrepel: 10 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Open regions

Cell_open_regions <- fread("/media/liyaru/LYR/MM2023/5_Result/17_scATAC/Open_regions.csv")
p <- ggboxplot(Cell_open_regions,
               x = "Celltype", y = "Open chromtin regions",
               fill = "Celltype")+
  RotatedAxis()+
  scale_fill_manual(values=c("grey","brown3"))+
  NoLegend()
p + stat_compare_means(method = "t.test")

Fimo result

# motif of peaks around FCRL5 
fimo <- fread("/media/liyaru/LYR/MM2023/5_Result/17_scATAC/peak_FCRL5_fimo/fimo.tsv")
## Warning in
## fread("/media/liyaru/LYR/MM2023/5_Result/17_scATAC/peak_FCRL5_fimo/fimo.tsv"):
## 在第 1186 行提前终止。预期有 10 个字段但只找到 0 个。可以考虑设置 fill=TRUE 和
## comment.char=。 首个丢弃的非空行:<<# FIMO (Find Individual Motif Occurrences):
## Version 5.1.1 compiled on Sep 24 2023 at 14:22:01>>
knitr::kable(fimo[1:10],format = "pandoc")
motif_id motif_alt_id sequence_name start stop strand score p-value q-value matched_sequence
MA0080.6 MA0080.6.Spi1 chr1:157511987-157512487 259 275 + 22.5041 0e+00 1.24e-05 AAAAAGAGGAAGTGGGC
MA0081.2 MA0081.2.SPIB chr1:157511987-157512487 260 275 - 20.3008 0e+00 1.42e-04 GCCCACTTCCTCTTTT
MA1270.1 MA1270.1.DOF3.2 chr1:157552271-157552771 311 329 - 18.5152 0e+00 1.84e-04 TTAACTTTTTTTTTTCTTT
MA1274.1 MA1274.1.DOF3.6 chr1:157552271-157552771 309 329 - 19.4091 1e-07 3.05e-04 TTAACTTTTTTTTTTCTTTGA
MA0080.6 MA0080.6.Spi1 chr1:157552271-157552771 250 266 + 19.5528 1e-07 2.03e-04 GAAAAAAGGAAGTGGGA
MA1267.1 MA1267.1.DOF5.8 chr1:157552271-157552771 313 341 - 18.8485 2e-07 6.78e-04 TTTTTTAAAACCTTAACTTTTTTTTTTCT
MA0081.2 MA0081.2.SPIB chr1:157552271-157552771 251 266 - 18.7398 2e-07 4.05e-04 TCCCACTTCCTTTTTT
MA1105.2 MA1105.2.GRHL2 chr1:157552271-157552771 144 155 + 16.1707 2e-07 8.86e-04 AAAACAGGTTTG
MA1978.1 MA1978.1.ZNF354A chr1:157566404-157566904 409 429 + 17.2381 2e-07 9.52e-04 ATAAACATAAATGATATATTT
MA1268.1 MA1268.1.CDF5 chr1:157552271-157552771 305 331 - 18.6515 2e-07 8.37e-04 CCTTAACTTTTTTTTTTCTTTGATCAT
fimo <- separate(fimo,col=c("motif_alt_id"),into = c("id","sup1","motif","sup"),remove = F)
## Warning: Expected 4 pieces. Additional pieces discarded in 7 rows [33, 192, 495, 708,
## 728, 933, 1035].
## Warning: Expected 4 pieces. Missing pieces filled with `NA` in 966 rows [1, 2, 5, 7, 8,
## 9, 10, 11, 12, 15, 16, 17, 19, 20, 23, 24, 26, 27, 29, 30, ...].
fimo <- fimo[!duplicated(fimo$motif),]
fimo$motif <- toupper(fimo$motif)
fimo <- fimo[order(fimo$`q-value`),]
fimo$rank <- 1:nrow(fimo)
fimo$log10_q <- -log10(fimo$`q-value`)
fimo$motif_seq <- paste0(fimo$motif,"(",fimo$matched_sequence,")")

p <- ggplot(fimo, aes(rank, log10_q, color = log10_q)) + 
  geom_point(size = 2) +
  ggrepel::geom_label_repel(
        data = fimo[1:10, ],
        aes(label = motif_seq), size = 3,show.legend = FALSE,
        label.size=NA,fill = NA,color = "black",
        max.overlaps=20) + 
  ylab("-log10(q-value)") + 
  xlab("Rank") +
  theme_bw()+
  scale_color_gradientn(colors = paletteContinuous(set ="solarExtra"))+
  geom_hline(yintercept = -log10(0.01),lty=3,col="black",lwd=0.5)
p

SCENIC

# vsnDir <- "/media/liyaru/LYR/MM2023/5_Result/11_SCENIC"
# scenicLoomPath <- file.path(vsnDir, "CD19.SCENIC.loom")
# file.exists(scenicLoomPath)
# 
# # read data
# loom <- open_loom(scenicLoomPath)
# exprMat <- get_dgem(loom)
# exprMat_log <- log2(exprMat+1) # Better if it is logged/normalized
# regulons_incidMat <- get_regulons(loom, column.attr.name='Regulons')
# regulons <- regulonsToGeneLists(regulons_incidMat)
# regulonAUC <- get_regulons_AUC(loom,column.attr.name='RegulonsAUC')
# regulonAucThresholds <- get_regulon_thresholds(loom)
# close_loom(loom)
# 
# cellClusters <- as.data.frame(BC@meta.data$celltype)
# rownames(cellClusters) <- rownames(BC@meta.data)
# colnames(cellClusters) <- "celltype"
# 
# selectedResolution <- "celltype"
# # Split the cells by cluster:
# cellsPerCluster <- split(rownames(cellClusters), cellClusters[,selectedResolution])
# 
# regulonAUC <- regulonAUC[onlyNonDuplicatedExtended(rownames(regulonAUC)),]
# 
# auc <- getAUC(regulonAUC)
# colnames(auc) <- gsub("HD2","HD",colnames(auc))
# saveRDS(auc,"/media/liyaru/LYR/MM2023/5_Result/11_SCENIC/tables/1.auc.rds")
# 
# # Calculate average expression:
# regulonActivity_byCellType <- sapply(cellsPerCluster,
#                                      function(cells) rowMeans(auc[,cells]))
# # Scale expression:
# regulonActivity_byCellType_Scaled <- t(scale(t(regulonActivity_byCellType), center = T, scale=T))
# 
# topRegulators <- reshape2::melt(regulonActivity_byCellType_Scaled)
# colnames(topRegulators) <- c("Regulon", "CellType", "RelativeActivity")
# topRegulators$CellType <- factor(as.character(topRegulators$CellType))
# fwrite(topRegulators,"/media/liyaru/LYR/MM2023/5_Result/11_SCENIC/tables/1.topRegulators.csv")
# 
# top_regu <- topRegulators %>%
#   group_by(CellType) %>%
#   top_n(n=10,wt=RelativeActivity)
# top_regu <- top_regu[!duplicated(top_regu$Regulon),]
# 
# # order
# top_regu$CellType <- factor(top_regu$CellType,
#                             levels = c("Pro B","Pre B","Naive B","Memory-Like 1","Memory-Like 2","Memory-Like 3"))
# top_regu <- top_regu[order(top_regu$CellType,-top_regu$RelativeActivity),]
# 
# top_regu_matrix <- regulonActivity_byCellType_Scaled[top_regu$Regulon,]
# fwrite(as.data.frame(top_regu_matrix),"/media/liyaru/LYR/MM2023/5_Result/11_SCENIC/tables/1.regulonAUC.heatmap.csv",row.names = T)

top_regu_matrix <- fread("/media/liyaru/LYR/MM2023/5_Result/11_SCENIC/tables/1.regulonAUC.heatmap.csv") %>% as.data.frame()
rownames(top_regu_matrix) <- top_regu_matrix$V1
top_regu_matrix <- top_regu_matrix[, -1]

anotation_col <- data.frame(celltype = colnames(top_regu_matrix))
colB_scenic <- col_B
names(colB_scenic) <- colnames(top_regu_matrix)
ann_colors <- list(celltype=colB_scenic)

p <- pheatmap(top_regu_matrix,cluster_rows = F,cluster_cols = F,
         border_color = NA,
         gaps_row = c(10,20,30,40,50),
         color = rdwhbu(100),
         show_rownames = F,
         annotation_col = anotation_col,
         annotation_colors = ann_colors)
## Warning: The input is a data frame, convert it to the matrix.
p

auc <- readRDS("/media/liyaru/LYR/MM2023/5_Result/11_SCENIC/tables/1.auc.rds")

SPI1auc = as.data.frame(auc['SPI1(+)',])
BC <- AddMetaData(BC,metadata = SPI1auc,col.name = "SPI1_regulonAUC")

FeaturePlot(BC,features ="SPI1_regulonAUC",pt.size = 0.8)&
  scale_color_gradientn(colors = c(rdwhbu(100)))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Venn (TF overlap)

library(VennDiagram)
## 载入需要的程辑包:futile.logger
## 
## 载入程辑包:'VennDiagram'
## The following object is masked from 'package:ggpubr':
## 
##     rotate
# enriched motif
motifsUp <- readRDS("/media/liyaru/LYR/MM2023/5_Result/17_scATAC/motifsUp.rds")
df <- data.frame(TF = rownames(motifsUp), mlog10Padj = assay(motifsUp)[,1])
df <- df[order(df$mlog10Padj, decreasing = TRUE),]
df$rank <- seq_len(nrow(df))
df <- separate(df,col = "TF",into = c("TF","TFsup"))
df <- df[df$mlog10Padj > (-log10(0.01)),]
Mem2_enriched_motif <- unique(df$TF)

# motif of peaks around FCRL5 
fimo <- fread("/media/liyaru/LYR/MM2023/5_Result/17_scATAC/peak_FCRL5_fimo/fimo.tsv")
fimo <- separate(fimo,col=c("motif_alt_id"),into = c("id","sup1","motif","sup"),remove = F)
fimo <- fimo[fimo$`q-value` < 0.01,]
FCRL5_motif <- unique(fimo$motif) %>% toupper()

topRegulators <- fread("/media/liyaru/LYR/MM2023/5_Result/11_SCENIC/tables/1.topRegulators.csv") %>% as.data.frame()
topRegulators <- topRegulators[topRegulators$CellType == "Memory-Like 2",]
topRegulators <- topRegulators[order(-topRegulators$RelativeActivity),]
topRegulators <- topRegulators[1:10,]
topRegulators$TF <- gsub("[(+)]","",topRegulators$Regulon)
SCENIC_mem2_TF <- topRegulators$TF

TF <- c(list(Mem2_enriched_motif),list(FCRL5_motif),list(SCENIC_mem2_TF))
#names(TF) <- c("Mem2 scATAC enriched motif","Motif in peaks of FCRL5","Top10 regulon activity")
names(TF) <- c("Enrichment","Regulate FCRL5","High Activity")
p = venn.diagram(
  x = TF,
  filename=NULL,
  fill= c("#EACEE3","#89C75F","#A1CDE1"),
  width = 1000,
  height = 1000, 
)

grid.draw(p)

enrich_FCRL5 <- intersect(Mem2_enriched_motif,FCRL5_motif)
print("Mem2 scATAC enriched motif (Enrichment)  &  Motif in peaks of FCRL5 (Regulate FCRL5)")
## [1] "Mem2 scATAC enriched motif (Enrichment)  &  Motif in peaks of FCRL5 (Regulate FCRL5)"
print(enrich_FCRL5)
## [1] "SPIB"   "SPIC"   "SPI1"   "POU6F1"
enrich_act <- intersect(Mem2_enriched_motif,SCENIC_mem2_TF)
print("Mem2 scATAC enriched motif (Enrichment)  &  Top10 regulon activity (High Activity)")
## [1] "Mem2 scATAC enriched motif (Enrichment)  &  Top10 regulon activity (High Activity)"
print(enrich_act)
## [1] "BATF"  "SPI1"  "TBX21"